{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\r\n",
      "第一回     灵根育孕源流出　心性修持大道生\r\n",
      "\r\n",
      "\r\n",
      "　　诗曰：\r\n",
      "　　　　混沌未分天地乱，茫茫渺渺无人见。\r\n",
      "　　　　自从盘古破鸿蒙，开辟从兹清浊辨。\r\n",
      "　　　　覆载群生仰至仁，发明万物皆成善。\r\n",
      "　　　　欲知造化会元功，须看西游释厄传。\r\n",
      "\r\n",
      "\r\n",
      "盖闻天地之数，有十二万九千六百岁为一元。将一元分为十二会，乃子、丑、寅\r\n",
      "、卯、辰、巳、午、未、申、酉、戌、亥之十二支也。每会该一万八百岁。且就\r\n",
      "一日而论：子时得阳气，而丑则鸡鸣﹔寅不通光，而卯则日出﹔辰时食后，而巳\r\n",
      "则挨排﹔日午天中，而未则西蹉﹔申时晡，而日落酉，戌黄昏，而人定亥。譬于\r\n",
      "大数，若到戌会之终，则天地昏曚而万物否矣。再去五千四百岁，交亥会之初，\r\n",
      "则当黑暗，而两间人物俱无矣，故曰混沌。又五千四百岁，亥会将终，贞下起元\r\n",
      "，近子之会，而复逐渐开明。邵康节曰：：“冬至子之半，天心无改移。一阳初\r\n",
      "动处，万物未生时。”到\n"
     ]
    }
   ],
   "source": [
    "from urllib.request import urlopen\n",
    "from zhconv import convert     #导入convert()函数，用于转换繁体字\n",
    "#网络在线古腾堡语料库中的《西游记》网址\n",
    "url='https://www.gutenberg.org/files/23962/23962-0.txt'\n",
    "html=urlopen(url).read()     #读取url内的html文本\n",
    "html=html.decode('utf-8')    #以指定方式解码字符串\n",
    "#提取《西游记》的部分内容，将其转换为简体字\n",
    "html=convert(html[600:1000],'zh-hans')\n",
    "print(html)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
